{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "b658d5f8-236b-4448-9b60-e5afe6bb72e1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>question</th>\n",
       "      <th>answer</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>what does jamaican people speak?</td>\n",
       "      <td>[Jamaican Creole English Language, Jamaican En...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>what did james k polk do before he was president?</td>\n",
       "      <td>[Lawyer]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>what is the oregon ducks 2012 football schedule?</td>\n",
       "      <td>[University of Oregon]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>who plays ken barlow in coronation street?</td>\n",
       "      <td>[Tony Warren]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>what happened after mr. sugihara died?</td>\n",
       "      <td>[Yaotsu]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>who did mozart write his four horn concertos for?</td>\n",
       "      <td>[wolfgang amadeus mozart used story by pierre ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>where is jamarcus russell from?</td>\n",
       "      <td>[Mobile]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>where was george washington carver from?</td>\n",
       "      <td>[Diamond]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>what else did ben franklin invent?</td>\n",
       "      <td>[Lightning rod, Franklin stove, Bifocals, Glas...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>who was richard nixon married to?</td>\n",
       "      <td>[Pat Nixon]</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                            question  \\\n",
       "0                   what does jamaican people speak?   \n",
       "1  what did james k polk do before he was president?   \n",
       "2   what is the oregon ducks 2012 football schedule?   \n",
       "3         who plays ken barlow in coronation street?   \n",
       "4             what happened after mr. sugihara died?   \n",
       "5  who did mozart write his four horn concertos for?   \n",
       "6                    where is jamarcus russell from?   \n",
       "7           where was george washington carver from?   \n",
       "8                 what else did ben franklin invent?   \n",
       "9                  who was richard nixon married to?   \n",
       "\n",
       "                                              answer  \n",
       "0  [Jamaican Creole English Language, Jamaican En...  \n",
       "1                                           [Lawyer]  \n",
       "2                             [University of Oregon]  \n",
       "3                                      [Tony Warren]  \n",
       "4                                           [Yaotsu]  \n",
       "5  [wolfgang amadeus mozart used story by pierre ...  \n",
       "6                                           [Mobile]  \n",
       "7                                          [Diamond]  \n",
       "8  [Lightning rod, Franklin stove, Bifocals, Glas...  \n",
       "9                                        [Pat Nixon]  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "\n",
    "name = \"WebQ_sample\"\n",
    "df = pd.read_json(f\"./raw/{name}.jsonl\", lines=True)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "f8f47b91-d09f-4087-a529-3f5f210b1370",
   "metadata": {},
   "outputs": [],
   "source": [
    "import random\n",
    "\n",
    "output_all = list()\n",
    "for idx, line in df.iterrows():\n",
    "    output_all.append({\n",
    "        \"idx\": idx,\n",
    "        \"question\": line[\"question\"],\n",
    "        \"answer\": line['answer']\n",
    "    })\n",
    "pd.DataFrame(output_all).to_json(f\"./formatted/{name}_{df.shape[0]}.jsonl\", orient=\"records\", lines=True)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
